ReduceScatter

对输入数组进行指定类型的分布式归约操作(Reduce),并将结果分散到各输出位置。支持 ReduceSum、ReduceMean、ReduceMax 和 ReduceMin。

输入:
  • input_data - 输入数据地址。

  • data_size - 数据长度。

  • reduce_type - 归约类型:
    • 0: ReduceSum

    • 1: ReduceMean

    • 2: ReduceMax

    • 3: ReduceMin

  • core_mask - 核掩码(仅适用于共享存储版本)。

输出:
  • output_data - 输出数据地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp, dp, int8, int16, int32

  • MT7004 支持hp, fp, i16, i32

共享存储版本:

void fp_reducescatter_s(float *input_data, float *output_data, int data_size, int reduce_type, int core_mask)
void hp_reducescatter_s(half *input_data, half *output_data, int data_size, int reduce_type, int core_mask)
void dp_reducescatter_s(double *input_data, double *output_data, int data_size, int reduce_type, int core_mask)
void i8_reducescatter_s(int8_t *input_data, int8_t *output_data, int data_size, int reduce_type, int core_mask)
void i16_reducescatter_s(int16_t *input_data, int16_t *output_data, int data_size, int reduce_type, int core_mask)
void i32_reducescatter_s(int *input_data, int *output_data, int data_size, int reduce_type, int core_mask)

C调用示例:

 1#include <stdio.h>
 2#include <reducescatter.h>
 3
 4int main() {
 5    float *input = (float *)0xA0000000;     // 输入在DDR空间
 6    float *output = (float *)0xC0000000;
 7    int data_size = 1024;
 8    int reduce_type = 0;                     // ReduceSum = 4;
 9    int core_mask = 0xff;
10
11    fp_reducescatter_s(input, output, data_size, reduce_type, core_mask);
12    return 0;
13}

私有存储版本:

void fp_reducescatter_p(float *input_data, float *output_data, int data_size, int reduce_type)
void hp_reducescatter_p(half *input_data, half *output_data, int data_size, int reduce_type)
void dp_reducescatter_p(double *input_data, double *output_data, int data_size, int reduce_type)
void i8_reducescatter_p(int8_t *input_data, int8_t *output_data, int data_size, int reduce_type)
void i16_reducescatter_p(int16_t *input_data, int16_t *output_data, int data_size, int reduce_type)
void i32_reducescatter_p(int *input_data, int *output_data, int data_size, int reduce_type)

C调用示例:

 1#include <stdio.h>
 2#include <reducescatter.h>
 3
 4int main() {
 5    float *input = (float *)0x10810000;      // 输入在L2空间
 6    float *output = (float *)0x10820000;
 7    int data_size = 1024;
 8    int reduce_type = 0;                      // ReduceSum
 9 = 4;
10
11    fp_reducescatter_p(input, output, data_size, reduce_type);
12    return 0;
13}